## Authors: Alexander Herzog and Kenneth Benoit
## Date: May 30, 2015
## Replication file for JOP article "The Most Unkindest Cuts: Speaker Selection and Expressed Government Dissent During Economic Crisis"

rm(list = ls(all = TRUE))

### PATHS ########################################
dataInPath <- "./generated_data/master_data.RData"
dataOutAll <- "./generated_data/working_data_all.RData"
dataOutSub <- "./generated_data/working_data_speakers.RData"
##################################################

load(dataInPath)

# ==================
# = Case selection =
# ==================
# remove finance ministers and opposition spokesperson
data <- data[-which(data$finance_minister==TRUE),] 
data <- data[-which(data$opposition_spokesperson==TRUE),]

# remove Cheann Comhairle and Leas Cheann Comhairle (for safety)
data <- data[!(data$position %in% c("Cheann Comhairle","Leas Cheann Comhairle")),]
data$position <- factor(data$position)

# remove those who were not elected yet (by-elections)
data <- data[data$notElectedYet==0,]

# remove those who resigned or died in office
data <- data[data$removed==0,]

# remove cases with missing values
# Note: election data is missing for Ceann Comhairle, who are automatically re-elected to the Dail. Because we exclude them from the analysis, missing values can be savely removed
data <- data[!is.na(data$first_preference_votes),]
data <- data[!is.na(data$quota),]


# ====================
# = Variable coding =
# ====================
# who spoke?
data$spoke <- 0
data$spoke[!is.na(data$textscore)] <- 1

# social need measure (proportion of constituents on the live register)
# - in current year
data$lr_prop_current_year <- data$lr_abs / data$population

# - in previous year
data$lr_prop_previous_year <- data$lr_abs_lag / data$population


# adjust for change in debate year
# Note: unemployment data was merged by debate year. 
# - for budgets 1987-1997, debates were held in January or February of
#   the same year's budget. Hence we use previous years unemployment
#   rate.
# - for budgets 1998-2013, debates were held in December for the next
#   year's budget. Hence we use the debate years unemployment rate

data$lr_prop <- NA

data$lr_prop[data$budget_year<=1997] <- data$lr_prop_previous_year[data$budget_year<=1997]

data$lr_prop[data$budget_year>=1998] <- data$lr_prop_current_year[data$budget_year>=1998]


# electoral safety measure (proportion of first preference votes on district quota)
data$safety <- data$first_preference_votes / data$quota

# party size
partySizeDF <- data.frame(table(data$budget_year,data$partyAbbrev))
names(partySizeDF) <- c("budget_year","partyAbbrev","partySize")
data <- merge(data,partySizeDF,by=c("budget_year","partyAbbrev"))
data$log.party.size <- log(data$partySize)

# county size
countySizeDF <- data.frame(table(data$budget_year,data$county))
names(countySizeDF) <- c("budget_year","county","countySize")
data <- merge(data,countySizeDF,by=c("budget_year","county"))
data$log.county.size <- log(data$countySize)


# govt backbenchers
data$backbench <- 0
data$backbench[data$position=="Govt backbencher"] <- 1

# government dummy
data$government <- 0
data$government[data$govt=="Government"] <- 1

# economic periods ref category
data$crisis <- 0
data$crisis[data$periodEcon=="Crisis"] <- 1

# log of numer of debate days
data$log.debate.days <- log(data$debate.days)


# ==================
# = Year selection =
# ==================
# remove pre-1987 years
data <- data[data$budget_year>=1987,]


# ===================
# = SAVED DATA SETS =
# ===================

# save full data set
# ------------------
dataAll <- data

# mean-center variables
dataAll$lr_propScaled <- as.numeric(scale(dataAll$lr_prop, center=TRUE, scale=TRUE))
dataAll$safetyScaled <- as.numeric(scale(dataAll$safety, center=TRUE, scale=TRUE))
dataAll$seniorityYearsScaled <- as.numeric(scale(dataAll$seniorityYears, center=TRUE, scale=TRUE))
#dataAll$partySizeScaled <- as.numeric(scale(dataAll$partySize, center=TRUE, scale=TRUE))

# mean-center unemployment measure by year
dataAll$lr_prop_yearScaled <- NA
for (i in unique(dataAll$budget_year)) {
    dataAll$lr_prop_yearScaled[dataAll$budget_year==i] <- as.numeric(scale(dataAll$lr_prop[dataAll$budget_year==i]))
}

dataAll$unemployment <- dataAll$lr_propScaled

# consecutive memberID
d <- data.frame(memberID=unique(dataAll$memberID),m=rank(unique(dataAll$memberID)))
dataAll <- merge(dataAll, d)

save(dataAll, file=dataOutAll)


# save data set with only those TDs who spoke
# -------------------------------------------
dataSub <- data[data$spoke==1,]

# mean-center variables
dataSub$lr_propScaled <- as.numeric(scale(dataSub$lr_prop, center=TRUE, scale=TRUE))
dataSub$safetyScaled <- as.numeric(scale(dataSub$safety, center=TRUE, scale=TRUE))
dataSub$seniorityYearsScaled <- as.numeric(scale(dataSub$seniorityYears, center=TRUE, scale=TRUE))
#dataSub$partySizeScaled <- as.numeric(scale(dataSub$partySize, center=TRUE, scale=TRUE))

# mean-center unemployment measure by year
dataSub$lr_prop_yearScaled <- NA
for (i in unique(dataSub$budget_year)) {
    dataSub$lr_prop_yearScaled[dataSub$budget_year==i] <- as.numeric(scale(dataSub$lr_prop[dataSub$budget_year==i]))
}

dataSub$unemployment <- dataSub$lr_propScaled

# consecutive memberID
d <- data.frame(memberID=unique(dataSub$memberID),m=rank(unique(dataSub$memberID)))
dataSub <- merge(dataSub, d)

save(dataSub, file=dataOutSub)

